1 Introduction

In this notbook we are going to filter link by DARs and DEA in order to analyse those peak that are deferentially expressed and deferentially accessible

2 Pre-processing

2.1 Load package

library(RColorBrewer)
library(Signac)
library(Seurat)
library(GenomicRanges)
library(future)
library(harmony)
library(EnsDb.Hsapiens.v86)
library(stringr)
library(dplyr)
library(ggplot2)
library(patchwork)
library(kableExtra)
library(tidyverse)
library(BSgenome.Hsapiens.UCSC.hg38)
library(plyr)
library(biomaRt)
library(BiocManager)
library(GenomicRanges)
set.seed(123)

2.2 Parameters

# Paths
path_to_obj_links_DARs_DEA <- ("~/Documents/multiome_tonsil_Lucia/results/R_objects/19.tonsil_multiome_bcell_level_1_Linkpeak_DARs_DEA.rds")
path_to_obj_links <- ("~/Documents/multiome_tonsil_Lucia/results/R_objects/19.tonsil_multiome_bcell_level_1_Linkpeak.rds")
path_to_markers<-("~/Documents/multiome_tonsil_Lucia/results/tables/18_tonsil_markers_level_1.csv")
path_to_join_link_DARs_DEA<-  ("~/Documents/multiome_tonsil_Lucia/results/tables/19.df_joint_link_DARs_DEA_level_1.csv")
path_to_join_link_DARs <- ("~/Documents/multiome_tonsil_Lucia/results/tables/19.df_joint_link_DARs_level_1.csv")
path_to_DARs<-("~/Documents/multiome_tonsil_Lucia/results/tables/19.df_DARs_level_1.csv")

2.3 Load data

tonsil_wnn_bcell_links_DARs_DEA <- readRDS(path_to_obj_links_DARs_DEA)
tonsil_wnn_bcell_links <- readRDS(path_to_obj_links)
tonsil_DARs<-read_csv(path_to_DARs)
## Rows: 7837 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): cluster, gene, chrom
## dbl (7): p_val, avg_log2FC, pct.1, pct.2, p_val_adj, start, end
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
tonsil_markers<-read_csv(path_to_markers)
## New names:
## * `` -> ...1
## Rows: 4431 Columns: 8── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): ...1, gene
## dbl (6): p_val, avg_log2FC, pct.1, pct.2, p_val_adj, cluster
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df_link_DARs<-read_csv(path_to_join_link_DARs)
## Rows: 568 Columns: 18
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (5): seqnames, strand, gene, peak, cluster
## dbl (13): start, end, width, score, zscore, pvalue, p_val, avg_log2FC, pct.1...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df_link_DARs_DEA<- read_csv(path_to_join_link_DARs_DEA)
## Rows: 542 Columns: 24
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (6): seqnames, strand, gene, peak, cluster, p_val.rna
## dbl (18): start, end, width, score, zscore, pvalue, p_val.dars, avg_log2FC.d...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

3 UMAP

3.1 Rename

Idents(tonsil_wnn_bcell_links)<-"wsnn_res.0.05"
new.cluster.ids <- c("NBC","MBC", "GC/DZ", "GC/LZ","PC")

names(new.cluster.ids) <- levels(tonsil_wnn_bcell_links)
tonsil_wnn_bcell_links <- RenameIdents(tonsil_wnn_bcell_links, new.cluster.ids)
tonsil_wnn_bcell_links[["annotation_level_1"]] <- Idents(object = tonsil_wnn_bcell_links)
Idents(tonsil_wnn_bcell_links_DARs_DEA)<-Idents(tonsil_wnn_bcell_links)
DimPlot(tonsil_wnn_bcell_links,label = TRUE, reduction = "wnn.umap", pt.size = 0.5,cols = c("#a6cee3", "#1f78b4","#b2df8a", 
             "#33a02c", "#fb9a99","#e31a1c")) + ggtitle("Joint UMAP")+NoLegend()

5 DARs

DT::datatable(tonsil_DARs)
## Warning in instance$preRenderHook(instance): It seems your data is too big
## for client-side DataTables. You may consider server-side processing: https://
## rstudio.github.io/DT/server.html
kbl(head(df_link_DARs_DEA),caption = "Table of the join of the all links peaks and the DARs") %>%
  kable_paper("striped", full_width = F)
Table 1: Table of the join of the all links peaks and the DARs
seqnames start end width strand score gene peak zscore pvalue p_val.dars avg_log2FC.dars pct.1.dars pct.2.dars p_val_adj.dars cluster start.peak end.peak p_val.rna avg_log2FC.rna pct.1.rna pct.2.rna p_val_adj.rna p_val_adj.y
chr3 1381223 187745727 186364505
0.1209302 BCL6 chr3-1380485-1381960 1.959346 0.0250361 0 0.2983296 0.176 0.069 0 GC/LZ 1380485 1381960 BCL61 0 1.1745827 0.658 0.197 0
chr3 8501630 187745727 179244098
0.1294057 BCL6 chr3-8501296-8501964 2.002691 0.0226052 0 0.2607253 0.312 0.125 0 GC/DZ 8501296 8501964 BCL6 0 0.8223499 0.731 0.175 0
chr3 8658427 187745727 179087301
0.1187620 BCL6 chr3-8657833-8659020 1.910297 0.0280475 0 0.2907075 0.123 0.029 0 GC/DZ 8657833 8659020 BCL6 0 0.8223499 0.731 0.175 0
chr3 9653316 187745727 178092412
0.1859457 BCL6 chr3-9652401-9654230 3.790392 0.0000752 0 0.3467498 0.313 0.103 0 GC/DZ 9652401 9654230 BCL6 0 0.8223499 0.731 0.175 0
chr3 9653316 187745727 178092412
0.1859457 BCL6 chr3-9652401-9654230 3.790392 0.0000752 0 0.2631839 0.242 0.120 0 GC/LZ 9652401 9654230 BCL61 0 1.1745827 0.658 0.197 0
chr3 9752268 187745727 177993460
0.1983285 BCL6 chr3-9751790-9752745 4.396954 0.0000055 0 0.3858557 0.296 0.085 0 GC/DZ 9751790 9752745 BCL6 0 0.8223499 0.731 0.175 0

6 Joint DEA

DT::datatable(df_link_DARs_DEA,caption = "Table of the filtered links peaks by DARs and GEx", class = 'cell-border stripe')
kbl(head(df_link_DARs_DEA),caption = "Table of the filtered links peaks by DARs and GEx") %>%
  kable_paper("striped", full_width = F)
Table 2: Table of the filtered links peaks by DARs and GEx
seqnames start end width strand score gene peak zscore pvalue p_val.dars avg_log2FC.dars pct.1.dars pct.2.dars p_val_adj.dars cluster start.peak end.peak p_val.rna avg_log2FC.rna pct.1.rna pct.2.rna p_val_adj.rna p_val_adj.y
chr3 1381223 187745727 186364505
0.1209302 BCL6 chr3-1380485-1381960 1.959346 0.0250361 0 0.2983296 0.176 0.069 0 GC/LZ 1380485 1381960 BCL61 0 1.1745827 0.658 0.197 0
chr3 8501630 187745727 179244098
0.1294057 BCL6 chr3-8501296-8501964 2.002691 0.0226052 0 0.2607253 0.312 0.125 0 GC/DZ 8501296 8501964 BCL6 0 0.8223499 0.731 0.175 0
chr3 8658427 187745727 179087301
0.1187620 BCL6 chr3-8657833-8659020 1.910297 0.0280475 0 0.2907075 0.123 0.029 0 GC/DZ 8657833 8659020 BCL6 0 0.8223499 0.731 0.175 0
chr3 9653316 187745727 178092412
0.1859457 BCL6 chr3-9652401-9654230 3.790392 0.0000752 0 0.3467498 0.313 0.103 0 GC/DZ 9652401 9654230 BCL6 0 0.8223499 0.731 0.175 0
chr3 9653316 187745727 178092412
0.1859457 BCL6 chr3-9652401-9654230 3.790392 0.0000752 0 0.2631839 0.242 0.120 0 GC/LZ 9652401 9654230 BCL61 0 1.1745827 0.658 0.197 0
chr3 9752268 187745727 177993460
0.1983285 BCL6 chr3-9751790-9752745 4.396954 0.0000055 0 0.3858557 0.296 0.085 0 GC/DZ 9751790 9752745 BCL6 0 0.8223499 0.731 0.175 0

7 Ceverage plots

idents.plot <- c("NBC","MBC", "GC/DZ", "GC/LZ","PC")
coverage_extend("BCL6",c(0,2000,1e+4,1e+5,1e+6),tonsil_wnn_bcell_links)
## [[1]]
## Warning: Removed 1 rows containing missing values (geom_segment).
## Removed 1 rows containing missing values (geom_segment).

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]
## Warning: Removed 22 rows containing missing values (geom_segment).

coverage_extend("BCL6",c(0,2000,1e+4,1e+5,1e+6),tonsil_wnn_bcell_links_DARs_DEA)
## [[1]]
## Warning: Removed 1 rows containing missing values (geom_segment).
## Removed 1 rows containing missing values (geom_segment).

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]
## Warning: Removed 22 rows containing missing values (geom_segment).

coverage_extend("PRDM1",c(0,2000,1e+4,1e+5,1e+6),tonsil_wnn_bcell_links)
## [[1]]
## Warning: Removed 55 rows containing missing values (geom_segment).
## Warning: Removed 1 rows containing missing values (geom_segment).
## Removed 1 rows containing missing values (geom_segment).

## 
## [[2]]
## Warning: Removed 55 rows containing missing values (geom_segment).
## Removed 1 rows containing missing values (geom_segment).

## 
## [[3]]
## Warning: Removed 54 rows containing missing values (geom_segment).
## Removed 1 rows containing missing values (geom_segment).

## 
## [[4]]
## Warning: Removed 40 rows containing missing values (geom_segment).

## 
## [[5]]
## Warning: Removed 2 rows containing missing values (geom_segment).

coverage_extend("PRDM1",c(0,2000,1e+4,1e+5,1e+6),tonsil_wnn_bcell_links_DARs_DEA)
## [[1]]
## Warning: Removed 55 rows containing missing values (geom_segment).
## Warning: Removed 1 rows containing missing values (geom_segment).
## Removed 1 rows containing missing values (geom_segment).

## 
## [[2]]
## Warning: Removed 55 rows containing missing values (geom_segment).
## Removed 1 rows containing missing values (geom_segment).

## 
## [[3]]
## Warning: Removed 54 rows containing missing values (geom_segment).
## Removed 1 rows containing missing values (geom_segment).

## 
## [[4]]
## Warning: Removed 40 rows containing missing values (geom_segment).

## 
## [[5]]
## Warning: Removed 2 rows containing missing values (geom_segment).

8 Tables

Here we can see the comparison of how many links are in each cluster and how we filter specifics peaks that are DA and belongs to DE genes of each cluster.

Doing so we can know which links are cause by a cluster.

table(df_link_DARs_DEA$gene=="BCL6",df_link_DARs_DEA$cluster)
##        
##         GC/DZ GC/LZ  PC
##   FALSE     0     0  80
##   TRUE    341   121   0
table(df_link_DARs$gene=="BCL6",df_link_DARs$cluster)
##        
##         GC/DZ GC/LZ NBC  PC
##   FALSE    15     1   0  80
##   TRUE    341   121   1   9

9 Session info

sessionInfo()
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur 10.16
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] es_ES.UTF-8/es_ES.UTF-8/es_ES.UTF-8/C/es_ES.UTF-8/es_ES.UTF-8
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] BiocManager_1.30.16               biomaRt_2.50.3                   
##  [3] plyr_1.8.6                        BSgenome.Hsapiens.UCSC.hg38_1.4.4
##  [5] BSgenome_1.62.0                   rtracklayer_1.54.0               
##  [7] Biostrings_2.62.0                 XVector_0.34.0                   
##  [9] forcats_0.5.1                     purrr_0.3.4                      
## [11] readr_2.1.2                       tidyr_1.2.0                      
## [13] tibble_3.1.6                      tidyverse_1.3.1                  
## [15] kableExtra_1.3.4                  patchwork_1.1.1                  
## [17] ggplot2_3.3.5                     dplyr_1.0.8                      
## [19] stringr_1.4.0                     EnsDb.Hsapiens.v86_2.99.0        
## [21] ensembldb_2.18.3                  AnnotationFilter_1.18.0          
## [23] GenomicFeatures_1.46.5            AnnotationDbi_1.56.2             
## [25] Biobase_2.54.0                    harmony_0.1.0                    
## [27] Rcpp_1.0.8.3                      future_1.24.0                    
## [29] GenomicRanges_1.46.1              GenomeInfoDb_1.30.1              
## [31] IRanges_2.28.0                    S4Vectors_0.32.3                 
## [33] BiocGenerics_0.40.0               SeuratObject_4.0.4               
## [35] Seurat_4.1.0                      Signac_1.6.0                     
## [37] RColorBrewer_1.1-2                BiocStyle_2.22.0                 
## 
## loaded via a namespace (and not attached):
##   [1] rappdirs_0.3.3              SnowballC_0.7.0            
##   [3] scattermore_0.8             bit64_4.0.5                
##   [5] knitr_1.37                  irlba_2.3.5                
##   [7] DelayedArray_0.20.0         data.table_1.14.2          
##   [9] rpart_4.1-15                KEGGREST_1.34.0            
##  [11] RCurl_1.98-1.6              generics_0.1.2             
##  [13] cowplot_1.1.1               RSQLite_2.2.10             
##  [15] RANN_2.6.1                  bit_4.0.4                  
##  [17] tzdb_0.2.0                  spatstat.data_2.1-2        
##  [19] webshot_0.5.2               xml2_1.3.3                 
##  [21] lubridate_1.8.0             httpuv_1.6.5               
##  [23] SummarizedExperiment_1.24.0 assertthat_0.2.1           
##  [25] xfun_0.30                   hms_1.1.1                  
##  [27] jquerylib_0.1.4             evaluate_0.15              
##  [29] promises_1.2.0.1            fansi_1.0.2                
##  [31] restfulr_0.0.13             progress_1.2.2             
##  [33] dbplyr_2.1.1                readxl_1.3.1               
##  [35] igraph_1.2.11               DBI_1.1.2                  
##  [37] htmlwidgets_1.5.4           sparsesvd_0.2              
##  [39] spatstat.geom_2.3-2         ellipsis_0.3.2             
##  [41] crosstalk_1.2.0             backports_1.4.1            
##  [43] bookdown_0.25               deldir_1.0-6               
##  [45] MatrixGenerics_1.6.0        vctrs_0.3.8                
##  [47] ROCR_1.0-11                 abind_1.4-5                
##  [49] cachem_1.0.6                withr_2.5.0                
##  [51] ggforce_0.3.3               vroom_1.5.7                
##  [53] sctransform_0.3.3           GenomicAlignments_1.30.0   
##  [55] prettyunits_1.1.1           goftest_1.2-3              
##  [57] svglite_2.1.0               cluster_2.1.2              
##  [59] lazyeval_0.2.2              crayon_1.5.0               
##  [61] pkgconfig_2.0.3             slam_0.1-50                
##  [63] labeling_0.4.2              tweenr_1.0.2               
##  [65] nlme_3.1-153                ProtGenerics_1.26.0        
##  [67] rlang_1.0.2                 globals_0.14.0             
##  [69] lifecycle_1.0.1             miniUI_0.1.1.1             
##  [71] filelock_1.0.2              BiocFileCache_2.2.1        
##  [73] modelr_0.1.8                cellranger_1.1.0           
##  [75] polyclip_1.10-0             matrixStats_0.61.0         
##  [77] lmtest_0.9-39               Matrix_1.3-4               
##  [79] ggseqlogo_0.1               zoo_1.8-9                  
##  [81] reprex_2.0.1                ggridges_0.5.3             
##  [83] png_0.1-7                   viridisLite_0.4.0          
##  [85] rjson_0.2.21                bitops_1.0-7               
##  [87] KernSmooth_2.23-20          blob_1.2.2                 
##  [89] parallelly_1.30.0           spatstat.random_2.1-0      
##  [91] scales_1.1.1                memoise_2.0.1              
##  [93] magrittr_2.0.2              ica_1.0-2                  
##  [95] zlibbioc_1.40.0             compiler_4.1.2             
##  [97] BiocIO_1.4.0                fitdistrplus_1.1-8         
##  [99] Rsamtools_2.10.0            cli_3.2.0                  
## [101] listenv_0.8.0               pbapply_1.5-0              
## [103] MASS_7.3-54                 mgcv_1.8-38                
## [105] tidyselect_1.1.2            stringi_1.7.6              
## [107] highr_0.9                   yaml_2.3.5                 
## [109] ggrepel_0.9.1               grid_4.1.2                 
## [111] sass_0.4.0                  fastmatch_1.1-3            
## [113] tools_4.1.2                 future.apply_1.8.1         
## [115] parallel_4.1.2              rstudioapi_0.13            
## [117] lsa_0.73.2                  gridExtra_2.3              
## [119] farver_2.1.0                Rtsne_0.15                 
## [121] digest_0.6.29               shiny_1.7.1                
## [123] qlcMatrix_0.9.7             broom_0.7.12               
## [125] later_1.3.0                 RcppAnnoy_0.0.19           
## [127] httr_1.4.2                  colorspace_2.0-3           
## [129] rvest_1.0.2                 XML_3.99-0.9               
## [131] fs_1.5.2                    tensor_1.5                 
## [133] reticulate_1.24             splines_4.1.2              
## [135] uwot_0.1.11                 RcppRoll_0.3.0             
## [137] spatstat.utils_2.3-0        plotly_4.10.0              
## [139] systemfonts_1.0.4           xtable_1.8-4               
## [141] jsonlite_1.8.0              R6_2.5.1                   
## [143] pillar_1.7.0                htmltools_0.5.2            
## [145] mime_0.12                   DT_0.21                    
## [147] glue_1.6.2                  fastmap_1.1.0              
## [149] BiocParallel_1.28.3         codetools_0.2-18           
## [151] utf8_1.2.2                  lattice_0.20-45            
## [153] bslib_0.3.1                 spatstat.sparse_2.1-0      
## [155] curl_4.3.2                  leiden_0.3.9               
## [157] magick_2.7.3                survival_3.2-13            
## [159] rmarkdown_2.13              docopt_0.7.1               
## [161] munsell_0.5.0               GenomeInfoDbData_1.2.7     
## [163] haven_2.4.3                 reshape2_1.4.4             
## [165] gtable_0.3.0                spatstat.core_2.4-0